"""
By Lei Gao
usage: Get_anchor_for_within_SV.py [-h] --Initial_block INITIAL_BLOCK
                                   --Final_block FINAL_BLOCK --Raw_Unique
                                   RAW_UNIQUE --Prefix PREFIX

optional arguments:
  -h, --help            show this help message and exit
  --Initial_block INITIAL_BLOCK
                        Synteny_block.py output raw synteny blocks
  --Final_block FINAL_BLOCK
                        Call_SV_between_anchors.py cleaned and rescued some
                        anchors
  --Raw_Unique RAW_UNIQUE
                        Raw unique anchors generated by Assemblytics
  --Prefix PREFIX       Prefix for outputs

"""
import argparse
from pathlib import Path
from operator import itemgetter
import re
import os

parser = argparse.ArgumentParser()
parser.add_argument("--Initial_block", type=str, help="Synteny_block.py output raw synteny blocks", required=True, default="")
parser.add_argument("--Final_block", type=str, help="Call_SV_between_anchors.py cleaned and rescued some anchors", required=True, default="")
parser.add_argument("--Raw_Unique", type=str, help="Raw unique anchors generated by Assemblytics", required=True, default="")
parser.add_argument("--Prefix", type=str, help="Prefix for outputs", required=True, default="")

args = parser.parse_args()

Initial_block = args.Initial_block
Final_block = args.Final_block
Raw_Unique = args.Raw_Unique
Prefix = args.Prefix


''' Step 0.0 Get Assemblytics anchors

ref_start	ref_end	query_start	query_end	ref_length	query_length	ref	query
24189148	25248284	27054200	28111716	51253844	53272422	OXv7ch01	KOv7ch01
29303211	30236628	31714169	32647582	51253844	53272422	OXv7ch01	KOv7ch01

'''
raw_keys = set()
Ref_chr_size = {}
Qry_chr_size = {}

with open(Raw_Unique) as input_file:
    for line in input_file:
        cells = line.strip().split("\t")
        key = "\t".join(map(str, cells))
        raw_keys.add(key)
        Ref_chr_size[cells[6]] = cells[4]
        Qry_chr_size[cells[7]] = cells[5]

'''
Step 1.0 check Initial_block
'''
my_set = set()

with open(Initial_block) as infile, open(Final_block) as infile2, open(Prefix + ".my_anchor.tab", "w") as outfile:
    i = -1
    for line in infile:
        i += 1
        if i > 0:
            cells = line.strip().split("\t")
            ref_start = cells[1]
            ref_end = cells[2]
            query_start = cells[5]
            query_end = cells[6]
            ref = cells[0]
            ref_length = Ref_chr_size[ref]
            query = cells[4]
            query_length = Qry_chr_size[query]
            key = "\t".join([ref_start, ref_end, query_start, query_end, ref_length, query_length, ref, query])
            my_set.add(key)
            outfile.write(key + "\n")
    i = -1
    for line in infile2:
        i += 1
        if i > 0:
            cells = line.strip().split("\t")[2:10]
            ref_start = cells[1]
            ref_end = cells[2]
            query_start = cells[5]
            query_end = cells[6]
            ref = cells[0]
            ref_length = Ref_chr_size[ref]
            query = cells[4]
            query_length = Qry_chr_size[query]
            key = "\t".join([ref_start, ref_end, query_start, query_end, ref_length, query_length, ref, query])
            if key not in my_set:
                my_set.add(key)
                outfile.write(key + "\n")

with open(Prefix + ".raw_unique_only.tab", "w") as outfile:
    for key in sorted(list(raw_keys.difference(my_set))):
        outfile.write(key + "\n")





#srf
